clear 

*do ${fmartorell_home}/top_program

*log using "${home}programs/process_report1_report2.log", replace
log using "${d1}log/process_report1_report2.log", replace

set mem 1000m

/* ---------------------------------------------------------------------------
Get a dataset from "first TASP" file with just ID variables
---------------------------------------------------------------------------- */

*use ${fmartorell_home}remediation/data/first_tasp_stack, clear
use ${d1}data/first_tasp_stack, clear
keep altpid school tspyr semester
bysort altpid school tspyr semester: assert _N==1
* save ${fmartorell_home}remediation/data/first_tasp_stack_fewvar, replace
save ${d1}data/first_tasp_stack_fewvar.dta, replace

/* ---------------------------------------------------------------------------
Get a dataset with JR/Sr status (from report 1)
---------------------------------------------------------------------------- */

*!rm ${fmartorell_home}tmp/schooltype.dta
!rm ${d1}tmp/schooltype.dta
forvalues year=190/205 {
 forvalues sem=1/4 {
 if `year'<204  {
  local fileloc="${report1sr}"
 }
 else {
  *local fileloc="${fmartorell_home}data/highered"
  local fileloc="${d1}data/highered"
 }
 if `year'==198 & `sem'==1 {
  local d="D"
 }
 else {
  local d="d"
 }
 cap confirm file `fileloc'/`d'1sr`year'_`sem'.dta
  if _rc==0 {
   qui use school using `fileloc'/`d'1sr`year'_`sem'.dta, clear 
   disp "`year' `sem' Senior"
   qui bysort school: keep if _n==1
   gen tspyr=`year'
   gen semester=`sem'
   gen byte sr=1
   *cap confirm file ${fmartorell_home}tmp/schooltype.dta
   cap confirm file ${d1}tmp/schooltype.dta
   if _rc==0 {
    *qui append using ${fmartorell_home}tmp/schooltype.dta
	qui append using ${d1}tmp/schooltype.dta
   }
   *save ${fmartorell_home}tmp/schooltype.dta, replace
   save ${d1}tmp/schooltype.dta, replace
  }

 if `year'<204 & (`year'!=203 | `sem'!=2) {
  local fileloc="${report1jr}"
  local d="D"
 }
 else {
  *local fileloc="${fmartorell_home}data/highered"
  local fileloc="${d1}data/highered"
  local d="d"
 }
 cap confirm file `fileloc'/`d'1jr`year'_`sem'.dta
 if _rc==0 {
  qui use school using `fileloc'/`d'1jr`year'_`sem'.dta, clear
   disp "`year' `sem' Junior"
  qui bysort school: keep if _n==1
   gen tspyr=`year'
   gen semester=`sem'
   gen byte sr=0
    *qui append using ${fmartorell_home}tmp/schooltype.dta
   qui append using ${d1}tmp/schooltype.dta
   sort school tspyr semester
   *save ${fmartorell_home}tmp/schooltype.dta, replace 
   save ${d1}tmp/schooltype.dta, replace
  }

 }
}


/* -----------------------------------------------------------------------------------------------
Get report 2 files with unique ssn-school observations with DE info and info on sch and gpa
(for a measure of college attainment)
----------------------------------------------------------------------------------------------- */

foreach  y of numlist 203 202 204 205  190/201 {
 local year=`y'
 forvalues sem=1/5 {
  if `year'<=191 | (`year'==198 & `sem'>1) {
   local d="D"
  }
  else {
   local d="d"
  }
  if `year'>203 {
   *local fileloc="${fmartorell_home}/data/highered"
   local fileloc="${d1}data/highered"
  }
  else {
   local fileloc="${report2}"
  }
  cap confirm file `fileloc'/`d'2sr`year'_`sem'.dta
  if _rc==0 {   
  qui use `fileloc'/`d'2sr`year'_`sem'.dta, clear
  qui gen byte prov=.
  foreach v in math read writ {
   cap rename `v'_course_grade grad`v'
   cap rename gr1`v' grad`v'
  }
  cap gen byte objectiv=3  /* In years 204,205, objectiv variable isn't there */
  keep altpid school *prov sch gp* sex ethnic b*dat* grad* objectiv
  drop prov
  cap rename birthdat bdate
  gen byte obs=_n  /* to keep sort order the same (very few duplicates) */
  sort altpid school obs
  by altpid school: keep if _n==1
  foreach test in math read writ {
   cap gen `test'de=inrange(`test'prov,1,4)  /* note: this won't run for years with no de prov variables*/
   cap gen `test'de=. /* note: this won't happen if de variable created in previous line */
  }
  cap gen byte anysubde=mathde+readde+writde
   *Merge on whether school is a JC or a SR college
   gen tspyr=`year'
   qui gen semester=`sem' if `sem'<5
   qui replace semester=2 if `sem'==5
   sort school tspyr semester
   *qui merge school tspyr semester using ${fmartorell_home}tmp/schooltype.dta, uniqusing nokeep keep(sr)
   qui merge school tspyr semester using ${d1}tmp/schooltype.dta, uniqusing nokeep keep(sr)
   tab _merge
   drop _merge
   qui replace semester=5 if `sem'==5
   sort altpid school tspyr semester
   if `year'<204 {
    *qui merge altpid school tspyr semester using ${fmartorell_home}remediation/data/first_tasp_stack_fewvar, unique nokeep keep(altpid)
    qui merge altpid school tspyr semester using ${d1}data/first_tasp_stack_fewvar, unique nokeep keep(altpid)
	gen byte mrgtofirst=_merge==3
   }
   else {
    gen byte mrgtofirst=0
   }
  gen byte seekdegree=inlist(objectiv,3,4)==1
  gsort altpid -mrgtofirst -seekdegree -sch school

  foreach subj in math read writ {
   cap rename gr1`subj' grad`subj'
  }
  foreach subj in math read writ {
   cap `subj'_course_grad grad`subj'
  } 
  keep altpid school mathde readde writde anysubde sch gp* sr sex ethnic bdate grad* mrgtofirst
  if `year'>203 {
   drop  mathde readde writde anysubde
  }
  cap rename gpe gpa
  *bysort altpid school: assert _N==1

  qui compress
  *save ${fmartorell_home}tmp/report2_`year'_`sem'.dta, replace 
  save ${d1}tmp/report2_`year'_`sem'.dta, replace
  *gsort altpid -mrgtofirst -sch school
  by altpid: keep if _n==1   /* keep obs that either merges to main dataset or has highest SCH */
  *qui save ${fmartorell_home}tmp/report2_`year'_`sem'_unique.dta, replace
  qui save ${d1}tmp/report2_`year'_`sem'_unique.dta, replace  
  } 

 }
}




# delimit ;

/* ---------------------------------------------------------------------------
Get small datasets of the higher ed (report 1) data
---------------------------------------------------------------------------- */;

/* Program recreates unique-ssn datasets of report 1

Small mistakes are corrected (detailed credit hour defs for JC start in 199(3); includes inter-instit.
credits)

Also, adds 203 report 1

Note: variable labels and names on TSP datafiles for credit hour variables often don't make any sense
*/;


# delimit ;
***************
for the jc's
***************;

program define clpsjrcoll;
 args tspyear term;
 if `tspyear'<204 & (`tspyear'!=203 | `term'!=2) {;
  local fileloc="${report1jr}";
  local d="D";
 };
 else {;
  local fileloc="${d1}data/highered";
  local d="d";
 };
 cap confirm file `fileloc'/`d'1jr`tspyear'_`term'.dta;
 if _rc==0 {;
  qui use `fileloc'/`d'1jr`tspyear'_`term'.dta, clear;
  cap rename birthdat bdate;

  if `tspyear'<199 | (`tspyear'==199 & `term'<=2) {;
    rename credit totcredit;
  };
  else {;
    qui gen totcredit=schuglim +devedsch+tottcsch+sch+credit;
    count if totcredit==.;

    gen devcredit=sch+devedsch; /* note: include hrs in excess of state limit */;

    gen acadcredit=schuglim+credit;

  };

  gen obs=_n  /*to keep the sort order the same every time */;

  sort altpid totcredit obs;
  local fulln=_N;
  qui by altpid: keep if _n==_N;    /* pick obs with most credits */;
  local frackept=_N/`fulln';
  disp "`tspyear' `term' Fraction kept=" %5.3f `frackept';
  qui count if totcredit>=3; local cred3=r(N)/_N;
  qui count if totcredit>=9; local cred9=r(N)/_N;
  qui count if totcredit>=12; local cred12=r(N)/_N;
  disp "`tspyear' `term':  >=3 cred=" % 5.3f `cred3' 
			  "   >=9 cred=" % 5.3f `cred9'
			  "   fulltime=" %5.3f `cred12';
  if `tspyear'<199 | (`tspyear'==199 & `term'<=2) {;
    keep altpid school totcredit type bdate ethnic sex;
  };
  else {;
   keep altpid school totcredit devcredit acadcredit type bdate ethnic sex;
  };
  qui compress;
  *qui save "$home/data/highered/report1jc`tspyear'_`term'", replace;
  qui save "${d1}data/highered/report1jc`tspyear'_`term'", replace;

 };
end;

***************
for the sr colleges
***************;

program define clpssrcoll;
 args tspyear term;
 if `tspyear'<204  {;
  local fileloc="${report1sr}";
 };
 else {;
  local fileloc="${d1}data/highered";
 };
 if `tspyear'==198 & `term'==1 {;
  local d="D";
 };
 else {;
  local d="d";
 };
 cap confirm file `fileloc'/`d'1sr`tspyear'_`term'.dta;
 if _rc==0 {;

  qui use `fileloc'/`d'1sr`tspyear'_`term'.dta, clear;
  cap rename birthdat bdate;
  cap confirm var sch_on;
  if `tspyear'<198 {;
   qui gen totcredit=sch_on+sch_off;
  };
  else if `tspyear'==198 & `term'==1 {;
   qui gen totcredit=oncamp+offcamp+sch_und+sch_dev;
  };
  else if (`tspyear'==198 & `term'>1) | (`tspyear'==199 & `term'<3) {;
   qui gen totcredit=sch_on+sch_off+excess_u+excess_d;
  };
  else {;
   qui gen totcredit=sch_on+sch_off;  /* should equal nofund_c+ nofund_d+ nofund_i+ sch_c+ sch_d+sch_i (not always, mainly for grad students) */
   
   qui gen devcredit=nofund_d+ sch_d;

   qui gen acadcredit=nofund_c+sch_c;
  };

  drop if totcredit==.;
  
  gen obs=_n  /*to keep the sort order the same every time */;

  sort altpid totcredit obs;
  local fulln=_N;
  qui by altpid: keep if _n==_N;   /* keep obs with most credits */;
  local frackept=_N/`fulln';
  disp "`tspyear' `term'  Fraction kept=" %5.3f `frackept';
  qui count if totcredit>=3; local cred3=r(N)/_N;
  qui count if totcredit>=9; local cred9=r(N)/_N;
  qui count if totcredit>=12; local cred12=r(N)/_N;
  disp "`tspyear' `term':  >=3 cred=" % 5.3f `cred3' 
			  "   >=9 cred=" % 5.3f `cred9'
			  "   fulltime=" %5.3f `cred12';
  cap confirm var acadcredit;
  if _rc==0 {;
   keep altpid school totcredit type bdate ethnic tutstat sex acadcredit devcredit;
  };
  else {;
   keep altpid school totcredit type bdate ethnic tutstat sex;
  };  
  *qui save "$home/data/highered/report1sr`tspyear'_`term'", replace;
  qui save "${d1}data/highered/report1sr`tspyear'_`term'", replace;
 };
end;


forvalues term=1/4 {;
 forvalues y=190/205 {;
   clpsjrcoll `y' `term';
 };
};

forvalues term=1/3 {;
 forvalues y=190/205 {;
   clpssrcoll `y' `term';
 };
};




# delimit cr
log close

